import pandas as pd
import numpy as np
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Resources'
import rpy2
import rpy2.robjects
rpy2.robjects.r['options'](warn=-1)
from rpy2.robjects.packages import importr
import rpy2.robjects.numpy2ri
importr('RCIT')
rpy2.robjects.numpy2ri.activate()

np.random.seed(5)


def independence_test(X, x_, y_, z_,random_seed = True):
    '''
    X:       is the whole data set
    x_,y_:   are the coordinates of the two variables for which to test independence
             for example x_ = 0, y_ = 1
    z_:      is eiter one single coordinate or a list of coordinates
    
    Example usage:
        Assume T is a pandas dataframe with X and Y as first (0) and second (1) column
        and the control set Z as additional columns
        Then X _||_ Y | Z
    independence_test(T, 0, 1, [2+n for n in range(length_of_variables_in_Z)], random_seed = True)


    small p-values correspond to low probability of independence
    Note that for robustness the test should be repeated multiple times
    '''

    if random_seed: seed = np.random.choice(1000)
    else: seed = 5
    local_copy = X.copy()
    array = np.array(local_copy.transpose().dropna())
    dim, T = array.shape
    x_vals = array[x_]
    y_vals = array[y_]
    if z_ != None:
        z_vals = np.fastCopyAndTranspose(array[z_])
        rcot = rpy2.robjects.r['RCoT'](x_vals, y_vals, z_vals, seed = seed)
    else:
        rcot = rpy2.robjects.r['RCoT'](x_vals, y_vals, seed = seed)
    return float(rcot.rx2('p')[0])


######### Functions to create the synthetic data 
def FORK(NOBS):
    Z = np.random.normal(0,1,NOBS)
    X = Z + np.random.normal(0,1,NOBS)
    Y = 0.5*Z + np.random.normal(0,1,NOBS)
    t = pd.DataFrame((X,Y,Z)).transpose()
    t.columns = ['X', 'Y', 'Z']
    #=== Number of variables in the conditioning set - one in this simple example
    conditionining_set_size = len(t.columns[2:])
    return t, conditionining_set_size
def CHAIN(NOBS):
    X = np.random.normal(0,1,NOBS)
    Z = X + np.random.normal(0,1,NOBS)
    Y = 0.5*Z + np.random.normal(0,1,NOBS)
    t = pd.DataFrame((X, Y, Z)).transpose()
    t.columns = ['X', 'Y', 'Z']
    #=== Number of variables in the conditioning set - one in this simple example
    conditionining_set_size = len(t.columns[2:])
    return t, conditionining_set_size
def COLLIDER(NOBS):
    X = np.random.normal(0,1,NOBS)
    Y = np.random.normal(0,1,NOBS)
    Z = X + Y + np.random.normal(0,1,NOBS)
    t = pd.DataFrame((X,Y,Z)).transpose()
    t.columns = ['X', 'Y', 'Z']
    #=== Number of variables in the conditioning set - one in this simple example
    conditionining_set_size = len(t.columns[2:])
    return t, conditionining_set_size